
import org.apache.hadoop.hive.ql.exec.UDF;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * @description: some desc
 * @author: jiashen
 * @date: 2021/9/14 下午4:56
 */
public  class WordsToWord extends UDF {
    public String evaluate(String words) {
        if(words.isEmpty() || "".equals(words)){
            return words;
        }
        List<String> word = analyze(words);
//        return String.join("|||||",word);
        return word.size() > 0 ? String.join("|",word)
                : "";

    }

    //使用IK分词器对字符串进行分词
    public static List<String> analyze(String text) {
        StringReader sr = new StringReader(text);
        IKSegmenter ik = new IKSegmenter(sr, true);
        Lexeme lex = null;
        List<String> keywordList = new ArrayList();
        try {
            while (true) {
                if ((lex = ik.next()) != null) {
                    String lexemeText = lex.getLexemeText();
                    keywordList.add(lexemeText);
                } else {
                    break;
                }
            }
            return keywordList;
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            return keywordList;
        }
    }

}
